 
. * White Male with at least bachelor's degree residing in California
. use ACS2018_CA, clear   

. 
. * hourly wages
. 
. gen hw = incwage/(uhrswork*52)

. gen lhw = ln(hw)
(1 missing value generated)

. 
. drop if lhw == .
(1 observation deleted)

. 
. sum lhw

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
         lhw |     17,815     3.69217    .8462979  -2.747271   7.907043

. 
. * group means
. 
. collapse lhw, by(age educd degfieldd)

. 
. sum lhw

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
         lhw |      7,073    3.693299    .7314974  -2.747271   7.130899

. 
. * regression models
. 
. local m1 = "i.educ i.age i.degfieldd"

. local m2 = "i.educd##i.age i.degfieldd"

. local m3 = "i.educd##i.degfieldd i.age"

. local m4 = "i.educd##i.age i.educd##i.degfieldd"

. local m5 = "i.educd##i.age i.age##i.degfieldd"

. local m6 = "i.educ##i.degfieldd i.age##i.degfieldd"

. local m7 = "i.educ##i.age##i.degfieldd"

. 
. local ld0 = 1e-7

. 
. splitsample, generate(sample) split(0.2 0.8)

. 
. label define svalues 1 "Training" 2 "Testing"

. label values sample svalues

. 
. tab sample, matcell(tmp)

     sample |      Freq.     Percent        Cum.
------------+-----------------------------------
   Training |      1,415       20.01       20.01
    Testing |      5,658       79.99      100.00
------------+-----------------------------------
      Total |      7,073      100.00

. local n_train = tmp[1,1]

. local n_test = tmp[2,1]

. 
. * Add noise to the train sample
. * normal with mean zero and standard deviation 0.5
. 
. gen e_tmp1 = rnormal(0, 0.5)

. gen e_tmp2 = rnormal(0, 0.5)

. bysort degfieldd: egen e_c = mean(e_tmp2)

. 
. * change values for c_e
. forvalues j=0(25)75 {
  2.         
. egen e_cluster_`j' = std((1-(`j'/100))*e_tmp1+(`j'/100)*e_c)
  3. replace e_cluster_`j' = e_cluster_`j'/2 
  4. 
. gen lhw_cluster_`j' = lhw
  5. replace lhw_cluster_`j' = lhw + e_cluster_`j' if sample == 1
  6. 
.         forvalues i=1/7 {
  7.         
.                 quietly elasticnet linear lhw_cluster_`j' `m`i'' ///
>                            if sample == 1, alpha(0) selection(none) grid(1,min(`ld0'))
  8.                 lassoselect alpha = 0 lambda = `ld0'                    
  9.                 estimates store ridgeless`i'
 10.                 scalar p`i' = e(k_allvars)
 11.         
.         }
 12. 
. lassogof ridgeless1 ridgeless2 ridgeless3 ridgeless4 ///
>                  ridgeless5 ridgeless6 ridgeless7, over(sample)
 13. 
. matrix res = r(table)
 14. matrix mse = res[1..14,1]
 15. matrix rsq = res[1..14,2]               
 16. 
. matrix mse_in = res[1,1]\res[3,1]\res[5,1]\res[7,1]\res[9,1] \res[11,1]\res[13,1]
 17. matrix mse_out= res[2,1]\res[4,1]\res[6,1]\res[8,1]\res[10,1]\res[12,1]\res[14,1]
 18. 
. matrix pm = (p1, p2, p3, p4, p5, p6, p7)'
 19. 
. matrix results = (pm, mse_in, mse_out)
 20. 
. matrix colnames results = p train_error test_error
 21. matrix rownames results = spec1 spec2 spec3 spec4 spec5 spec6 spec7
 22. 
. 
. matrix list results
 23.                  
. mat2txt, matrix(results) saving("results_`j'") replace ///
>         title(in-sample vs. out-of-sample MSE with c=`j'/100)  
 24. 
. }       
(7,073 real changes made)
(1,415 real changes made)
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected

Penalized coefficients
-------------------------------------------------------------
Name             sample |         MSE    R-squared        Obs
------------------------+------------------------------------
ridgeless1              |
               Training |    .5664053       0.2773      1,415
                Testing |    .4984406       0.0571      5,658
------------------------+------------------------------------
ridgeless2              |
               Training |    .5206223       0.3357      1,415
                Testing |     .558816      -0.0572      5,658
------------------------+------------------------------------
ridgeless3              |
               Training |    .4290459       0.4526      1,415
                Testing |    .6557615      -0.2406      5,658
------------------------+------------------------------------
ridgeless4              |
               Training |    .3785127       0.5171      1,415
                Testing |    .8122957      -0.5367      5,658
------------------------+------------------------------------
ridgeless5              |
               Training |    .0324297       0.9586      1,415
                Testing |    .8677153      -0.6415      5,658
------------------------+------------------------------------
ridgeless6              |
               Training |    .0174099       0.9778      1,415
                Testing |    .8188231      -0.5490      5,658
------------------------+------------------------------------
ridgeless7              |
               Training |    .0000452       0.9999      1,415
                Testing |    .5519577      -0.0442      5,658
-------------------------------------------------------------

results[7,3]
                 p  train_error   test_error
spec1          209     .5664053    .49844057
spec2          391    .52062227    .55881601
spec3          598    .42904595    .65576151
spec4          778    .37851274    .81229568
spec5         1640     .0324297    .86771528
spec6         1754    .01740991    .81882315
spec7         2182    .00004521    .55195775
(7,073 real changes made)
(1,415 real changes made)
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected

Penalized coefficients
-------------------------------------------------------------
Name             sample |         MSE    R-squared        Obs
------------------------+------------------------------------
ridgeless1              |
               Training |     .566066       0.2787      1,415
                Testing |    .4992685       0.0555      5,658
------------------------+------------------------------------
ridgeless2              |
               Training |    .5203088       0.3370      1,415
                Testing |    .5600454      -0.0595      5,658
------------------------+------------------------------------
ridgeless3              |
               Training |    .4287659       0.4536      1,415
                Testing |     .654348      -0.2379      5,658
------------------------+------------------------------------
ridgeless4              |
               Training |    .3782575       0.5180      1,415
                Testing |    .8106045      -0.5335      5,658
------------------------+------------------------------------
ridgeless5              |
               Training |     .032409       0.9587      1,415
                Testing |    .8703117      -0.6464      5,658
------------------------+------------------------------------
ridgeless6              |
               Training |    .0173974       0.9778      1,415
                Testing |    .8137831      -0.5395      5,658
------------------------+------------------------------------
ridgeless7              |
               Training |    .0000452       0.9999      1,415
                Testing |    .5423348      -0.0260      5,658
-------------------------------------------------------------

results[7,3]
                 p  train_error   test_error
spec1          209    .56606597     .4992685
spec2          391    .52030877    .56004544
spec3          598    .42876591    .65434796
spec4          778    .37825754    .81060448
spec5         1640    .03240898    .87031169
spec6         1754    .01739745    .81378311
spec7         2182    .00004523    .54233478
(7,073 real changes made)
(1,415 real changes made)
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected

Penalized coefficients
-------------------------------------------------------------
Name             sample |         MSE    R-squared        Obs
------------------------+------------------------------------
ridgeless1              |
               Training |    .5627373       0.2853      1,415
                Testing |    .5033637       0.0478      5,658
------------------------+------------------------------------
ridgeless2              |
               Training |    .5172325       0.3431      1,415
                Testing |    .5646696      -0.0682      5,658
------------------------+------------------------------------
ridgeless3              |
               Training |    .4260182       0.4589      1,415
                Testing |    .6587809      -0.2463      5,658
------------------------+------------------------------------
ridgeless4              |
               Training |    .3757522       0.5228      1,415
                Testing |    .8102524      -0.5328      5,658
------------------------+------------------------------------
ridgeless5              |
               Training |    .0322033       0.9591      1,415
                Testing |    .8666778      -0.6396      5,658
------------------------+------------------------------------
ridgeless6              |
               Training |    .0172752       0.9781      1,415
                Testing |    .8109177      -0.5341      5,658
------------------------+------------------------------------
ridgeless7              |
               Training |    .0000451       0.9999      1,415
                Testing |    .5393324      -0.0203      5,658
-------------------------------------------------------------

results[7,3]
                 p  train_error   test_error
spec1          209    .56273727    .50336368
spec2          391    .51723251    .56466955
spec3          598    .42601817    .65878085
spec4          778    .37575217    .81025238
spec5         1640     .0322033    .86667783
spec6         1754    .01727519    .81091772
spec7         2182    .00004511    .53933239
(note: file results_50.txt not found)
(7,073 real changes made)
(1,415 real changes made)
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected
ID = 1  alpha = 0.000  lambda = 1.00e-07 selected

Penalized coefficients
-------------------------------------------------------------
Name             sample |         MSE    R-squared        Obs
------------------------+------------------------------------
ridgeless1              |
               Training |    .5359656       0.3284      1,415
                Testing |    .5309522      -0.0044      5,658
------------------------+------------------------------------
ridgeless2              |
               Training |    .4924695       0.3829      1,415
                Testing |    .5918621      -0.1197      5,658
------------------------+------------------------------------
ridgeless3              |
               Training |    .4038943       0.4939      1,415
                Testing |     .679021      -0.2846      5,658
------------------------+------------------------------------
ridgeless4              |
               Training |    .3555612       0.5545      1,415
                Testing |    .8232746      -0.5574      5,658
------------------------+------------------------------------
ridgeless5              |
               Training |    .0305391       0.9617      1,415
                Testing |    .8655885      -0.6375      5,658
------------------------+------------------------------------
ridgeless6              |
               Training |    .0162887       0.9796      1,415
                Testing |    .7983961      -0.5104      5,658
------------------------+------------------------------------
ridgeless7              |
               Training |    .0000438       0.9999      1,415
                Testing |    .5483812      -0.0374      5,658
-------------------------------------------------------------

results[7,3]
                 p  train_error   test_error
spec1          209    .53596557    .53095225
spec2          391    .49246948    .59186213
spec3          598    .40389429    .67902099
spec4          778    .35556118    .82327458
spec5         1640    .03053914    .86558852
spec6         1754    .01628874    .79839612
spec7         2182    .00004381    .54838118
(note: file results_75.txt not found)

.                  
. cap log close
